Design de matrice = 0 car pas d’apriori sur les données
load("MultiDataSet.Rdata")
data = list(NOESY= Data_Noesy_assigned$X$NOESY,
OTU = Data_Noesy_assigned$X$OTU)
## check dimension
lapply(data, dim)## $NOESY
## [1] 846 84
##
## $OTU
## [1] 846 997
## 1 2 3
## 168 442 236
#Design :
design = matrix(0, ncol = length(data), nrow = length(data),
dimnames = list(names(data), names(data)))
diag(design) = 0
#design[, "OTU"] <- 0
#design["OTU", ] <- 0
design ## NOESY OTU
## NOESY 0 0
## OTU 0 0
Diablo_total
D’après le graph, le taux d’erreur (ER :error rate ) et le taux d’erreur global équilibré/moyenné (BER : balanced error rate) est minimale soit à 3 composantes pour ER soit à 10 composantes avec BER.La distance max semble donner une meilleure précision.
Considering this distance and the BER, the output $choice.ncomp indicates an optimal number of components for the final DIABLO model. Il s’avère que c’est 1 probablement du à un écart type plus petit. Cependant nous allons prendre 4 dimensions
Si test effectué sur données assignées avec:
- Noesy et CPMG +OTU alors Noesy et CPMG =1 OTU sur ncomp= 4, 15 rep OTU = 12,2,75,1
- Noesy+OTU=rep20 Noesy ; 40 55 40 40 18 18 8 3 2 1 et OTU :95 35 105 75 1 16 2 1 1 6
monkeepX = list(NOESY = c( 40,55,40,40,18,18,8,3,2,1),
OTU = c( 95,35,105,75,1,16,2,1,1,6))
sgccda.res = block.splsda(X = data, Y = Y, ncomp = 10,
keepX = monkeepX, design = design)## Design matrix has changed to include Y; each block will be
## linked to Y.
## NOESY OTU Y
## NOESY 0 0 1
## OTU 0 0 1
## Y 1 1 0
plotVar(sgccda.res, var.names = FALSE, style = 'graphics', legend = TRUE,
pch = c(16, 17), cex = c(2,2), col = c('darkorchid', 'brown1'))# plotVar(sgccda.res, var.names = FALSE, style = 'graphics', legend = TRUE,
# pch = c(16, 17, 15), cex = c(2,2,2), col = c('darkorchid', 'brown1','lightgreen'))
# plotVar(sgccda.res, var.names = FALSE, style = 'graphics', legend = TRUE,
# pch = c(16, 17), cex = c(2,2), col = c('darkorchid', 'brown1'))# circosPlot(sgccda.res, cutoff = 0.7, line = TRUE,
# color.blocks= c('darkorchid', 'brown1', 'lightgreen'),
# color.cor = c("chocolate3","grey20"), size.labels = 1.5)
circosPlot(sgccda.res, cutoff = 0.7, line = TRUE,
color.blocks= c('darkorchid', 'brown1'),
color.cor = c("chocolate3","grey20"), size.labels = 1.5)png(file = "networkotu_Noesy_tabac.png", width = 800, height = 700)
fig_tabac <- network(sgccda.res,
comp = list(NOESY = 1,
OTU= 1),
shape.node = rep("circle", 2),
lty.edge = "solid",
lwd.edge = 3,
color.edge = color.spectral(50),
blocks = c(1, 2), cutoff = 0.7)
dev.off()
# network(sgccda.res, blocks = c(1,2,3),
# color.node = c('darkorchid', 'brown1', 'lightgreen'), cutoff = 0.4)Network_NOESY_OTU_tabac
load("MultiDataSet.Rdata")
data = list(NOESY= Data_sex$X$NOESY,
CPMG= Data_sex$X$CPMG,
OTU = Data_sex$X$OTU)
# data = list(NOESY= Data_Noesy_assigned$X$NOESY,
# OTU = Data_Noesy_assigned$X$OTU)
## check dimension
lapply(data, dim)## $NOESY
## [1] 846 215
##
## $CPMG
## [1] 846 202
##
## $OTU
## [1] 846 997
## 1 2
## 433 413
#Design :
design = matrix(0.9, ncol = length(data), nrow = length(data),
dimnames = list(names(data), names(data)))
diag(design) = 0
design[, "OTU"] <- 0
design["OTU", ] <- 0
design ## NOESY CPMG OTU
## NOESY 0.0 0.9 0
## CPMG 0.9 0.0 0
## OTU 0.0 0.0 0
##Tuning the number of components
Diablo_total
D’après le graph, le taux d’erreur (ER :error rate ) et le taux d’erreur global équilibré/moyenné (BER : balanced error rate) diminue à 1 et/ou 4 composantes.La distance max semble donner une meilleure précision.
Considering this distance and the BER, the output $choice.ncomp indicates an optimal number of components for the final DIABLO model. Il s’avère que c’est 1 probablement du à un écart type plus petit. Cependant nous allons prendre 4 dimensions
Quelque soit les tests réalisés avec les composantes de 4 ou 7 avec des valeurs tests de keepx c(1:9, seq(10, 18, 2), seq(20,50,5), seq(55,105,10)) et 50 répétitions nous obtenons les valeurs suivantes: noesy et CPMG toujours = 1,1,1,1 OTU variable mais dans ce cas =14,2,1,1
# Une fois les tests réalisés indiqué les valeurs
# cas pour 3 repetitions
list.keepX =list(NOESY = c(1,1,1,1),
CPMG = c(1,1,1,1),
OTU = c(16,1,1,4))
# monkeepX = list(NOESY = c( 65,1,12,2,1, 4,75,10,30, 2),
# OTU = c( 105, 1,35, 85, 25,10, 6,85,105,12))
sgccda.res = block.splsda(X = data, Y = Y, ncomp = 4,
keepX = list.keepX, design = design)## Design matrix has changed to include Y; each block will be
## linked to Y.
## NOESY CPMG OTU Y
## NOESY 0.0 0.9 0 1
## CPMG 0.9 0.0 0 1
## OTU 0.0 0.0 0 1
## Y 1.0 1.0 1 0
## Contribution
plotVar(sgccda.res, var.names = FALSE, style = 'graphics', legend = TRUE,
pch = c(16, 17, 15), cex = c(2,2,2), col = c('darkorchid', 'brown1','lightgreen'))circosPlot(sgccda.res, cutoff = 0.7, line = TRUE,
color.blocks= c('darkorchid', 'brown1', 'lightgreen'),
color.cor = c("chocolate3","grey20"), size.labels = 1.5)Code ne fonctionne pas :
Error in coord[[j]][, comp[[blocks[j]]] %in% int.comp] :
nombre de dimensions incorrect
Fonctionne sur 2 omics (sans CPMG par exemple)
png(file = "network_tot_sex.png", width = 800, height = 700)
fig_bis <- network(sgccda.res,
comp = list(NOESY = 1,
CPMG = 1,
OTU = 1),
blocks = c(1, 2, 3), cutoff = 0.7)
dev.off()
fig_bis <- network(sgccda.res,
comp = list(NOESY = 1,
CPMG = 1,
OTU= 1),
shape.node = rep("circle", 3),
lty.edge = "solid",
lwd.edge = 3,
color.edge = color.spectral(50),
blocks = c(1, 2,3), cutoff = 0.1)
dev.off()## [1] 846 215
## [1] 846 997
dat_No_OT.shrink <- rcc(X, Y, ncomp = 3, method = 'shrinkage')
plot(dat_No_OT.shrink, type = "barplot")lambda1=0 lambda2=0.2 CVscore = 0.2611652
Paramètre_pénalisation
temps d’execution : Time difference of 36.44826 mins lambda1=0 lambda2=10 CVscore = 0.1708556
Paramètre_pénalisation2
# cv$opt.lambda1 #lambda1
# cv$opt.lambda2 #lambda12
# cv$opt.score #CV score
lambda1=0
lambda2=0.2
CVscore = 0.2611652
par(mfrow=c(1,1)) # to close the mfrow figure
data.rcc <- rcc(X,Y, ncomp = 3, lambda1 = lambda1,
lambda2 = lambda2)
#data.rcc <- rcc(X,Y, ncomp = 3, lambda1 = cv$opt.lambda1,
# lambda2 = cv$opt.lambda2)
plot(data.rcc,type="barplot")plotIndiv(data.rcc, comp = 1:2, ind.names = NULL,
group = Data_sex$Y, rep.space = "XY-variate",
legend = TRUE, title = 'Noesy-OTU, rCCA XY-space')plotIndiv(data.rcc, comp = 1:2, ind.names = NULL,
group = Data_sex$Y,
legend = TRUE, title = 'Noesy-OTU, rCCA, each subspace')## Possibilité de faire superposer les graph mais illisible avec le nbre d’échantillons
col.nutri <- color.mixo(as.numeric(Data_sex$Y))
plotArrow(data.rcc, col = col.nutri, title = 'Noesy OTU, arrow plot')plotVar(data.rcc, comp = 1:2, cutoff = 0.1, var.names = c(FALSE, FALSE),
cex = c(2, 2), title = 'Noesy-OTU, rCCA comp 1 - 2, cutoff 0.1')Network_NOESY_OTU_sex
Cluster_NOESY_OTU_tabac